import pandas as pd
import numpy as np
from copy import deepcopy
from ast import literal_eval as make_tuple
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
from clustergrammer2 import net
import helper_functions as hf
def cell_umi_count(df):
sum_arr = []
sum_names = []
for inst_cell in df:
sum_arr.append( df[inst_cell].sum() )
sum_names.append(inst_cell)
ser_sum = pd.Series(data=sum_arr, index=sum_names)
return ser_sum
df = {}
df['adt'] = pd.read_parquet('../data/10k_pbmc_feature_v3-chem_v3.0.0-cr/processed_data/adt-cat.parquet')
df['adt'].columns = [make_tuple(x) for x in df['adt'].columns]
df['adt'].shape
df['gex'] = pd.read_parquet('../data/10k_pbmc_feature_v3-chem_v3.0.0-cr/processed_data/gex-cat_100-var.parquet')
df['gex'].columns = [make_tuple(x) for x in df['gex'].columns]
df['gex'].shape
%%time
ser_sum = cell_umi_count(df['gex'])
df['gex'] = df['gex'].div(ser_sum)
print(df['gex'].shape)
print(df['gex'].sum().head())
net.load_df(df['adt'])
net.widget()
Top 100 variable genes
net.load_df(df['gex'])
net.normalize(axis='row', norm_type='zscore')
net.clip(-5,5)
net.widget()